library(dplyr)
# For Model fitting
library(lme4)
library(nlme)
library(purrr)
# For diagnostics
library(performance)
# For adding new columns
library(tibble)# Load data
sys.source("./scripts/code_join_data_full_dataset.R", envir = knitr::knit_global())# Load functions
## Models
sys.source("./R/functions_models.R", envir = knitr::knit_global())Select performance and traits variables
Transform variables
data_for_models <-
data_for_models %>%
# Select variables for analysis
dplyr::select(c(1:7,9,11:ncol(data_for_models)))Models: Questions 1 and 2
\[response\sim treatment*fixer\ + initial\ height + random( 1|\ specie)\]
# Take response variables names
response_vars_q1_q2 <-
set_names(names(data_for_models)[5:14])
response_vars_q1_q2 total_biomass above_biomass below_biomass rgr amax
"total_biomass" "above_biomass" "below_biomass" "rgr" "amax"
gs wue d13c d15n pnue
"gs" "wue" "d13c" "d15n" "pnue"
models_q1_q2 <- map(response_vars_q1_q2, ~ mixed_model_1(response = .x,
data = data_for_models))model_q2_wue_log <- lmer(log(wue) ~ nfixer*treatment + init_height + (1|spcode),
data = data_for_models)
model_q2_pnue_log <- lmer(log(pnue) ~ nfixer*treatment + init_height + (1|spcode),
data = data_for_models)
log_models <- list(model_q2_wue_log, model_q2_pnue_log)
names(log_models) <- c("wue_log", "pnue_log")# Append log models to model list
models_q1_q2 <- append(log_models, models_q1_q2)Models Nodule count
- Chapter 9 Mixed models in ecology check glmmML package for count data
- GOOD ref https://www.dataquest.io/blog/tutorial-poisson-regression-in-r/
- #https://www.flutterbys.com.au/stats/tut/tut11.2a.html
# Load data
# This step was done like this because I am working with a subset of the data
# source cleaned data
source("./scripts/code_clean_data_nodules.R")
# Delete unused variables
data_nodules_cleaned <-
data_nodules_cleaned %>%
# add id to rownames for keep track of the rows
column_to_rownames("id") %>%
dplyr::select(spcode,treatment, everything())m4 lmer gaussian
lmer_gaussian <- lmer(number_of_root_nodulation ~ treatment + init_height +
(1 |spcode),
data = data_nodules_cleaned)lmer_gaussian_log <- lmer(log(number_of_root_nodulation) ~ treatment + init_height +
(1 |spcode),
data = data_nodules_cleaned)m5 glmer poisson
glmer_poisson <- glmer(number_of_root_nodulation ~ treatment + init_height +
(1 |spcode), family = "poisson",
data = data_nodules_cleaned)models_nodule_count <- list(lmer_gaussian, lmer_gaussian_log, glmer_poisson)
names(models_nodule_count) <- c("lmer_gaussian",
"lmer_gaussian_log",
"glmer_poisson")Mycorrhizal colonization
I decided not to include it, because I want to focus on Nfixing vrs non-Fixing,
also I don't trust on the dataModels: Question 3
\[performance\sim treatment:fixer:scale(trait)\ + initial\ height + random( 1|\ specie)\]
Scale preditors
data_for_models_scaled <-
data_for_models %>%
mutate(across(c(4,9:14),scale)) # Select traits (x_vars)
traits_names <-
set_names(names(data_for_models_scaled)[c(9:14)])
traits_names amax gs wue d13c d15n pnue
"amax" "gs" "wue" "d13c" "d15n" "pnue"
# Select plants performance vars (y_vars)
performance_names <-
set_names(names(data_for_models_scaled)[c(5:8)])
performance_names total_biomass above_biomass below_biomass rgr
"total_biomass" "above_biomass" "below_biomass" "rgr"
Models lme4::lmer
models_lmer_formulas <- model_combinations_formulas(performance_names, traits_names)
length(models_lmer_formulas)[1] 24
models_lmer_formulas[1]$`abovebiomass~amax`
above_biomass ~ treatment:nfixer:amax + init_height + (1 | spcode)
<environment: 0x7fa0499be218>
models_q3_lmer <- map(models_lmer_formulas,
~ lmer(.x, data = data_for_models_scaled))Models nlme::lme
models_nlme_formulas <- model_combinations_formulas(performance_names,
traits_names,
nlme = T)
length(models_nlme_formulas)[1] 24
models_nlme_formulas[1]$`abovebiomass~amax`
above_biomass ~ treatment:nfixer:amax + init_height
<environment: 0x7fa04ad86228>
models_q3_nlme <- map(models_nlme_formulas, ~model_nlme(., data = data_for_models_scaled))Validation plots
Collinearity
map(models_q1_q2, check_collinearity)$wue_log
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
nfixer 1.30 1.14 0.77
treatment 3.85 1.96 0.26
init_height 1.05 1.02 0.96
nfixer:treatment 4.57 2.14 0.22
$pnue_log
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
nfixer 1.30 1.14 0.77
treatment 3.85 1.96 0.26
init_height 1.05 1.02 0.96
nfixer:treatment 4.55 2.13 0.22
$total_biomass
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.86 1.97 0.26
nfixer 1.12 1.06 0.90
init_height 1.06 1.03 0.95
treatment:nfixer 4.12 2.03 0.24
$above_biomass
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.87 1.97 0.26
nfixer 1.06 1.03 0.94
init_height 1.06 1.03 0.94
treatment:nfixer 3.98 2.00 0.25
$below_biomass
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.86 1.96 0.26
nfixer 1.16 1.08 0.86
init_height 1.05 1.03 0.95
treatment:nfixer 4.22 2.06 0.24
$rgr
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.87 1.97 0.26
nfixer 1.05 1.02 0.95
init_height 1.06 1.03 0.94
treatment:nfixer 3.96 1.99 0.25
$amax
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.88 1.97 0.26
nfixer 1.03 1.01 0.97
init_height 1.07 1.03 0.94
treatment:nfixer 3.90 1.98 0.26
$gs
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.83 1.96 0.26
nfixer 1.92 1.39 0.52
init_height 1.03 1.01 0.97
Moderate Correlation
Term VIF Increased SE Tolerance
treatment:nfixer 6.08 2.47 0.16
$wue
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.85 1.96 0.26
nfixer 1.19 1.09 0.84
init_height 1.05 1.03 0.95
treatment:nfixer 4.29 2.07 0.23
$d13c
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.85 1.96 0.26
nfixer 1.20 1.09 0.84
init_height 1.05 1.03 0.95
treatment:nfixer 4.31 2.07 0.23
$d15n
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.84 1.96 0.26
nfixer 1.41 1.19 0.71
init_height 1.04 1.02 0.96
treatment:nfixer 4.84 2.20 0.21
$pnue
# Check for Multicollinearity
Low Correlation
Term VIF Increased SE Tolerance
treatment 3.84 1.96 0.26
nfixer 1.33 1.15 0.75
init_height 1.04 1.02 0.96
treatment:nfixer 4.62 2.15 0.22
# map(models_nodule_count, check_collinearity)#Warning: Model has interaction terms. VIFs might be inflated. You may check
#multicollinearity among predictors of a model without interaction terms.
#map(models_list_q3, check_mul)Bolker’s plots
bolker_validation <- function(model) {
a <- plot(model, type = c("p", "smooth"))
## heteroscedasticity
b <- plot(model,sqrt(abs(resid(.))) ~ fitted(.), type = c("p", "smooth"))
cowplot::plot_grid(a,b,nrow = 1)
}Models for questions 1 and 2
map(models_q1_q2, bolker_validation)$wue_log
$pnue_log
$total_biomass
$above_biomass
$below_biomass
$rgr
$amax
$gs
$wue
$d13c
$d15n
$pnue
Models for nodule count
map(models_nodule_count, bolker_validation)$lmer_gaussian
$lmer_gaussian_log
$glmer_poisson
Models for question 3
lme4 models
map(models_q3_lmer, bolker_validation)$`abovebiomass~amax`
$`abovebiomass~d13c`
$`abovebiomass~d15n`
$`abovebiomass~gs`
$`abovebiomass~pnue`
$`abovebiomass~wue`
$`belowbiomass~amax`
$`belowbiomass~d13c`
$`belowbiomass~d15n`
$`belowbiomass~gs`
$`belowbiomass~pnue`
$`belowbiomass~wue`
$`rgr~amax`
$`rgr~d13c`
$`rgr~d15n`
$`rgr~gs`
$`rgr~pnue`
$`rgr~wue`
$`totalbiomass~amax`
$`totalbiomass~d13c`
$`totalbiomass~d15n`
$`totalbiomass~gs`
$`totalbiomass~pnue`
$`totalbiomass~wue`
nlme models
map(models_q3_nlme, bolker_validation)$`abovebiomass~amax`
$`abovebiomass~d13c`
$`abovebiomass~d15n`
$`abovebiomass~gs`
$`abovebiomass~pnue`
$`abovebiomass~wue`
$`belowbiomass~amax`
$`belowbiomass~d13c`
$`belowbiomass~d15n`
$`belowbiomass~gs`
$`belowbiomass~pnue`
$`belowbiomass~wue`
$`rgr~amax`
$`rgr~d13c`
$`rgr~d15n`
$`rgr~gs`
$`rgr~pnue`
$`rgr~wue`
$`totalbiomass~amax`
$`totalbiomass~d13c`
$`totalbiomass~d15n`
$`totalbiomass~gs`
$`totalbiomass~pnue`
$`totalbiomass~wue`
Performance package
Models for questions 1,2
map(models_q1_q2, check_model)$wue_log
$pnue_log
$total_biomass
$above_biomass
$below_biomass
$rgr
$amax
$gs
$wue
$d13c
$d15n
$pnue
Models for nodule count
map(models_nodule_count, check_model)$lmer_gaussian
$lmer_gaussian_log
$glmer_poisson
Models for question 3
map(models_q3_lmer, check_model)$`abovebiomass~amax`
$`abovebiomass~d13c`
$`abovebiomass~d15n`
$`abovebiomass~gs`
$`abovebiomass~pnue`
$`abovebiomass~wue`
$`belowbiomass~amax`
$`belowbiomass~d13c`
$`belowbiomass~d15n`
$`belowbiomass~gs`
$`belowbiomass~pnue`
$`belowbiomass~wue`
$`rgr~amax`
$`rgr~d13c`
$`rgr~d15n`
$`rgr~gs`
$`rgr~pnue`
$`rgr~wue`
$`totalbiomass~amax`
$`totalbiomass~d13c`
$`totalbiomass~d15n`
$`totalbiomass~gs`
$`totalbiomass~pnue`
$`totalbiomass~wue`
Save lists with the models
saveRDS(models_q1_q2, file = "./processed_data/models_q1_q2.RData")
saveRDS(models_q3_lmer, file = "./processed_data/models_q3_3_way_interaction_lmer.RData")
saveRDS(models_q3_nlme, file = "./processed_data/models_q3_3_way_interaction_nlme.RData")
saveRDS(models_nodule_count, file = "./processed_data/models_nodule_count.RData")